Observed data
Observed log-chlorophyll at representative station for the St. Lucie Estuary
library(tidyverse)
library(lubridate)
library(mgcv)
library(plotly)
library(WRTDStidal)
library(gridExtra)
source('R/funcs.R')
# flow data, left moving window average of 30 days
data(sl_fldat)
fl_dat <- sl_fldat %>%
rename(date = Date) %>%
mutate(
qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
)
# format the data to model
data(sl_dat)
sl_mod <- sl_dat %>%
rename(date = Date) %>%
mutate(
doy = yday(date),
dec_time = decimal_date(date),
yr = year(date),
yr = factor(yr),
mo = month(date, label = T)
) %>%
left_join(fl_dat, by = 'date') %>%
mutate(
flo = log(qsm),
chl = log(1 + chl)
) %>%
select(-q, -qsm, -sal)
# plot, all
p <- ggplot(sl_mod, aes(x = date, y = chl)) +
geom_line() +
theme_bw()
ggplotly(p)
# boxplot, by years
p <- ggplot(sl_mod, aes(x = yr, y = chl)) +
geom_boxplot() +
theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sl_mod, aes(x = mo, y = chl)) +
geom_boxplot() +
theme_bw()
ggplotly(p)
GAMs with annual, seasonal trends
Some simple GAMs to explore annual, seasonal trends.
# smooths to evaluate
smths <- c(
"s(dec_time, bs = 'tp')",
"s(doy, bs = 'cc')",
"te(dec_time, doy, bs = c('tp', 'cc'))"
)
# get all combinations of smoothers to model, one to many
frms <- list()
for(i in seq_along(smths)){
frm <- combn(smths, i) %>%
apply(2, function(x){
paste(x, collapse = ' + ') %>%
paste('chl ~ ', .) %>%
formula
})
frms <- c(frms, frm)
}
# create models from smooth formula combinations
mods <- map(frms, function(frm){
gam(frm,
knots = list(doy = c(1, 366)),
data = sl_mod,
na.action = na.exclude
)
})
names(mods) <- paste0('mod', seq_along(mods))
Summary stats of annual, seasonal models:
# smoother stats of GAMs
map(mods, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| mod1 |
s(dec_time) |
1.491837 |
1.833950 |
8.0481357 |
0.0017935 |
| mod2 |
s(doy) |
4.209953 |
8.000000 |
6.5199494 |
0.0000000 |
| mod3 |
te(dec_time,doy) |
8.433081 |
11.101531 |
5.1741398 |
0.0000001 |
| mod4 |
s(dec_time) |
1.882782 |
2.353842 |
6.0328718 |
0.0014303 |
| mod4 |
s(doy) |
4.222344 |
8.000000 |
6.6335533 |
0.0000000 |
| mod5 |
s(dec_time) |
1.000000 |
1.000000 |
5.6591977 |
0.0179168 |
| mod5 |
te(dec_time,doy) |
9.559921 |
15.000000 |
3.3266003 |
0.0000000 |
| mod6 |
s(doy) |
4.206482 |
8.000000 |
5.8392276 |
0.0000000 |
| mod6 |
te(dec_time,doy) |
2.741101 |
3.810399 |
3.9993657 |
0.0043228 |
| mod7 |
s(dec_time) |
1.000000 |
1.000000 |
10.6360338 |
0.0012203 |
| mod7 |
s(doy) |
4.164553 |
8.000000 |
5.1577131 |
0.0000000 |
| mod7 |
te(dec_time,doy) |
2.852311 |
15.000000 |
0.2807338 |
0.1611397 |
# summary stats of GAMs
map(mods, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable
| mod1 |
654.5720 |
0.0350432 |
| mod2 |
621.1283 |
0.1312536 |
| mod3 |
619.4651 |
0.1457643 |
| mod4 |
609.3679 |
0.1649803 |
| mod5 |
619.0978 |
0.1517706 |
| mod6 |
609.1895 |
0.1674090 |
| mod7 |
608.6850 |
0.1711443 |
# prediction data
pred_dat <- crossing(
yr = seq(floor(min(sl_mod$dec_time)), ceiling(max(sl_mod$dec_time))),
doy = seq(1, 365, by = 5)
) %>%
unite('date', yr, doy, sep = '-', remove = F) %>%
mutate(
date = as.Date(date, format = '%Y-%j'),
dec_time = decimal_date(date),
mo = month(date, label = TRUE),
yr = year(date)
) %>%
left_join(., fl_dat[, c('date', 'qsm')]) %>%
mutate(flo = log(qsm)) %>%
select(-qsm)
# predictions
sl_res <- map(mods, function(x){
pred_dat %>%
mutate(
pred = predict(x, newdata = pred_dat)
)
}) %>%
enframe('mods') %>%
unnest
# plot
p <- ggplot(sl_res, aes(x = date)) +
geom_point(data = sl_mod, aes(y = chl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)
# plot
p <- ggplot(sl_res, aes(x = doy, group = factor(yr), colour = yr)) +
geom_line(aes(y = pred)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
) +
facet_wrap(~ mods, ncol = 2)
ggplotly(p)
GAMs with annual, seasonal, and flow trends
Adding flow data to the model:
# smooths to evaluate
smths <- c(
"s(dec_time, bs = 'tp')",
"s(doy, bs = 'cc')",
"s(flo, bs = 'tp')",
"te(flo, doy, bs = c('tp', 'cc'))",
"te(flo, dec_time, bs = c('tp', 'tp'))",
"te(dec_time, doy, bs = c('tp', 'cc'))",
"te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp'))"
)
# get all combinations of smoothers to model, one to many
frms <- list()
for(i in seq_along(smths)){
frm <- combn(smths, i) %>%
apply(2, function(x){
paste(x, collapse = ' + ') %>%
paste('chl ~ ', .) %>%
formula
})
frms <- c(frms, frm)
}
# create models from smooth formula combinations
mods2 <- map(frms, function(frm){
gam(frm,
knots = list(doy = c(1, 366)),
data = sl_mod,
na.action = na.exclude
)
})
names(mods2) <- paste0('mod', seq_along(mods2))
save(mods2, file = 'data/sl_mods2.RData', compress = 'xz')
Summary stats of best year/season model, year/season/flow model:
data(sl_mods2)
# best model with only season, year
best1 <- map(mods, ~ summary(.x)$r.sq) %>%
unlist %>%
which.max %>%
mods[[.]]
# best model with season, year, flow
best2 <- map(mods2, ~ summary(.x)$r.sq) %>%
unlist %>%
which.max %>%
mods2[[.]]
best <- list(best1 = best1, best2 = best2)
# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| best1 |
s(dec_time) |
1.0000001 |
1.000000 |
10.6360338 |
0.0012203 |
| best1 |
s(doy) |
4.1645525 |
8.000000 |
5.1577131 |
0.0000000 |
| best1 |
te(dec_time,doy) |
2.8523113 |
15.000000 |
0.2807338 |
0.1611397 |
| best2 |
s(dec_time) |
1.0000008 |
1.000001 |
1.9293923 |
0.1658109 |
| best2 |
s(doy) |
2.9402431 |
8.000000 |
0.7092063 |
0.0456631 |
| best2 |
s(flo) |
6.7839630 |
7.863753 |
2.9627043 |
0.0049629 |
| best2 |
te(flo,doy) |
2.9616367 |
12.000000 |
0.5845557 |
0.0124682 |
| best2 |
te(flo,dec_time) |
0.2050133 |
15.000000 |
0.0193067 |
0.0936091 |
| best2 |
te(dec_time,doy) |
0.0000023 |
15.000000 |
0.0000001 |
0.8125709 |
| best2 |
te(dec_time,doy,flo) |
19.8642287 |
48.000000 |
1.0091690 |
0.0000325 |
# summary stats of GAMs
map(best, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable
| best1 |
608.6850 |
0.1711443 |
| best2 |
556.8277 |
0.3351218 |
# predictions
sl_res2 <- map(best, function(x){
pred_dat %>%
mutate(
pred = predict(x, newdata = pred_dat)
)
}) %>%
enframe('mods') %>%
unnest
# plot
p <- ggplot(sl_res2, aes(x = date)) +
geom_point(data = sl_mod, aes(y = chl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)
ptheme <- theme(
axis.title.x = element_blank(),
axis.title.y = element_blank()
)
cols <- 'Spectral'
pb1 <- dynagam(best1, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
theme(legend.position = 'none') +
ggtitle('Best 1')
pb2 <- dynagam(best2, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
ggtitle('Best2')
pleg <- g_legend(pb2)
pb2 <- pb2 +
theme(legend.position = 'none')
grid.arrange(
pleg,
arrangeGrob(pb1, pb2, ncol = 2, bottom = 'lnQ', left = 'lnchl'),
ncol = 1,
heights = c(0.1, 1)
)

Adding nitrogen and turbidity covariates
# formula for best annual, seasonal, flow model
strt <- best2$formula %>%
as.character
smths <- c(
"s(nh, bs = 'tp')",
"s(tss, bs = 'tp')",
"te(nh, tss, bs = c('tp', 'tp'))"
)
# get all combinations of smoothers to model, one to many
frmstab <- list()
frms <- list()
for(i in seq_along(smths)){
# for the summary table
frmtab <- combn(smths, i) %>%
apply(2, function(x){
paste(x, collapse = ' + ')
})
# for the model
frm <- sapply(frmtab, function(x){
paste('chl ~', strt[3], '+', x) %>%
formula
})
frmstab <- c(frmstab, frmtab)
frms <- c(frms, frm)
}
# create models from smooth formula combinations
mods3 <- map(frms, function(frm){
gam(frm,
knots = list(doy = c(1, 366)),
data = sl_mod,
na.action = na.exclude
)
})
names(mods3) <- paste0('mod', seq_along(mods3))
Summary of all nutrient, turbidity models
# smoother stats of GAMs
map(mods3, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| mod1 |
s(dec_time) |
7.1522980 |
8.144604 |
2.2843078 |
0.0226542 |
| mod1 |
s(doy) |
4.4270740 |
8.000000 |
4.7050151 |
0.0000000 |
| mod1 |
s(flo) |
3.5483178 |
4.353752 |
2.7567268 |
0.0253103 |
| mod1 |
te(flo,doy) |
0.8014227 |
15.000000 |
0.0725558 |
0.0790987 |
| mod1 |
te(flo,dec_time) |
2.0738102 |
16.000000 |
0.2060798 |
0.0478041 |
| mod1 |
te(dec_time,doy) |
0.0000009 |
15.000000 |
0.0000000 |
0.6596548 |
| mod1 |
te(dec_time,doy,flo) |
7.4985802 |
48.000000 |
0.3037391 |
0.0030504 |
| mod1 |
s(nh) |
3.1140722 |
3.791727 |
8.7739122 |
0.0000024 |
| mod2 |
s(dec_time) |
7.3595891 |
8.301880 |
1.7741030 |
0.0781964 |
| mod2 |
s(doy) |
3.7288636 |
8.000000 |
2.1351290 |
0.0000375 |
| mod2 |
s(flo) |
6.4627017 |
7.626692 |
1.3052852 |
0.1888331 |
| mod2 |
te(flo,doy) |
0.2021979 |
15.000000 |
0.0149169 |
0.1829650 |
| mod2 |
te(flo,dec_time) |
3.5399133 |
16.000000 |
0.6739463 |
0.0029717 |
| mod2 |
te(dec_time,doy) |
0.0000039 |
15.000000 |
0.0000001 |
0.6162590 |
| mod2 |
te(dec_time,doy,flo) |
9.4311894 |
48.000000 |
0.3786696 |
0.0037678 |
| mod2 |
s(tss) |
4.0133779 |
4.921622 |
3.5508373 |
0.0044754 |
| mod3 |
s(dec_time) |
8.0417018 |
8.725110 |
2.7393263 |
0.0051412 |
| mod3 |
s(doy) |
4.4082508 |
8.000000 |
5.7139093 |
0.0000000 |
| mod3 |
s(flo) |
1.0000209 |
1.000033 |
5.8789383 |
0.0159194 |
| mod3 |
te(flo,doy) |
0.7171574 |
15.000000 |
0.0955845 |
0.0138524 |
| mod3 |
te(flo,dec_time) |
3.8664969 |
16.000000 |
1.2835237 |
0.0000002 |
| mod3 |
te(dec_time,doy) |
0.0000014 |
15.000000 |
0.0000001 |
0.5863846 |
| mod3 |
te(dec_time,doy,flo) |
4.4251546 |
48.000000 |
0.1510152 |
0.0217732 |
| mod3 |
te(nh,tss) |
10.9862923 |
12.320136 |
5.5396180 |
0.0000000 |
| mod4 |
s(dec_time) |
7.8748101 |
8.634556 |
2.2810759 |
0.0215738 |
| mod4 |
s(doy) |
4.3273103 |
8.000000 |
5.6933609 |
0.0000000 |
| mod4 |
s(flo) |
4.6583896 |
5.686054 |
2.8227241 |
0.0111382 |
| mod4 |
te(flo,doy) |
0.0283851 |
15.000000 |
0.0033107 |
0.0968418 |
| mod4 |
te(flo,dec_time) |
2.6963868 |
16.000000 |
0.3558319 |
0.0125090 |
| mod4 |
te(dec_time,doy) |
0.0000014 |
15.000000 |
0.0000001 |
0.6166412 |
| mod4 |
te(dec_time,doy,flo) |
6.0892601 |
48.000000 |
0.2285051 |
0.0098800 |
| mod4 |
s(nh) |
3.3583330 |
4.074465 |
9.0342260 |
0.0000006 |
| mod4 |
s(tss) |
4.5158071 |
5.487045 |
3.8275540 |
0.0016388 |
| mod5 |
s(dec_time) |
8.0484777 |
8.729073 |
2.7767507 |
0.0047801 |
| mod5 |
s(doy) |
4.3976198 |
8.000000 |
5.3574757 |
0.0000000 |
| mod5 |
s(flo) |
1.0000029 |
1.000005 |
5.6587753 |
0.0180052 |
| mod5 |
te(flo,doy) |
0.9753113 |
15.000000 |
0.1244896 |
0.0173888 |
| mod5 |
te(flo,dec_time) |
3.9431131 |
16.000000 |
1.2111145 |
0.0000004 |
| mod5 |
te(dec_time,doy) |
0.0000025 |
15.000000 |
0.0000001 |
0.6013498 |
| mod5 |
te(dec_time,doy,flo) |
4.3699401 |
48.000000 |
0.1461156 |
0.0242046 |
| mod5 |
s(nh) |
3.6223225 |
4.509054 |
5.9038455 |
0.0000760 |
| mod5 |
te(nh,tss) |
9.8228824 |
20.000000 |
1.5171723 |
0.0001388 |
| mod6 |
s(dec_time) |
7.8567156 |
8.626056 |
2.4772317 |
0.0121889 |
| mod6 |
s(doy) |
4.2434324 |
8.000000 |
5.0031988 |
0.0000000 |
| mod6 |
s(flo) |
3.3138514 |
4.071771 |
3.5596082 |
0.0069834 |
| mod6 |
te(flo,doy) |
0.2255661 |
15.000000 |
0.0291191 |
0.0744306 |
| mod6 |
te(flo,dec_time) |
2.5038558 |
16.000000 |
0.3256927 |
0.0122653 |
| mod6 |
te(dec_time,doy) |
0.0000007 |
15.000000 |
0.0000000 |
0.6201662 |
| mod6 |
te(dec_time,doy,flo) |
6.3129187 |
48.000000 |
0.2404234 |
0.0083718 |
| mod6 |
s(tss) |
4.5950839 |
5.576246 |
3.6017473 |
0.0024819 |
| mod6 |
te(nh,tss) |
2.2494458 |
14.000000 |
2.7791025 |
0.0000000 |
| mod7 |
s(dec_time) |
7.9410958 |
8.671029 |
2.5158647 |
0.0107913 |
| mod7 |
s(doy) |
4.3386360 |
8.000000 |
4.7255731 |
0.0000000 |
| mod7 |
s(flo) |
3.1281958 |
3.841844 |
3.4897305 |
0.0089717 |
| mod7 |
te(flo,doy) |
0.3594447 |
15.000000 |
0.0451695 |
0.0808500 |
| mod7 |
te(flo,dec_time) |
2.5392626 |
16.000000 |
0.3378472 |
0.0106254 |
| mod7 |
te(dec_time,doy) |
0.0000004 |
15.000000 |
0.0000000 |
0.6355450 |
| mod7 |
te(dec_time,doy,flo) |
5.9550492 |
48.000000 |
0.2262731 |
0.0089427 |
| mod7 |
s(nh) |
1.0000004 |
1.000001 |
0.0085306 |
0.9264745 |
| mod7 |
s(tss) |
4.4162128 |
5.369065 |
3.7807464 |
0.0019287 |
| mod7 |
te(nh,tss) |
2.1696083 |
15.000000 |
1.7236509 |
0.0000000 |
# summary stats of GAMs
map(mods3, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
mutate(smth_added = frmstab) %>%
select(name, smth_added, everything()) %>%
kable
| mod1 |
s(nh, bs = ‘tp’) |
497.8121 |
0.3680850 |
| mod2 |
s(tss, bs = ‘tp’) |
534.1996 |
0.3471245 |
| mod3 |
te(nh, tss, bs = c(‘tp’, ‘tp’)) |
481.6626 |
0.4065122 |
| mod4 |
s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) |
481.3778 |
0.4071965 |
| mod5 |
s(nh, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) |
481.8319 |
0.4105542 |
| mod6 |
s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) |
478.9757 |
0.4079437 |
| mod7 |
s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) |
479.7727 |
0.4073784 |
Summary stats of best three three models:
# best model with season, year, flow
best3 <- map(mods3, ~ summary(.x)$r.sq) %>%
unlist %>%
which.max %>%
mods3[[.]]
best <- list(best1 = best1, best2 = best2, best3 = best3)
# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| best1 |
s(dec_time) |
1.0000001 |
1.000000 |
10.6360338 |
0.0012203 |
| best1 |
s(doy) |
4.1645525 |
8.000000 |
5.1577131 |
0.0000000 |
| best1 |
te(dec_time,doy) |
2.8523113 |
15.000000 |
0.2807338 |
0.1611397 |
| best2 |
s(dec_time) |
1.0000008 |
1.000001 |
1.9293923 |
0.1658109 |
| best2 |
s(doy) |
2.9402431 |
8.000000 |
0.7092063 |
0.0456631 |
| best2 |
s(flo) |
6.7839630 |
7.863753 |
2.9627043 |
0.0049629 |
| best2 |
te(flo,doy) |
2.9616367 |
12.000000 |
0.5845557 |
0.0124682 |
| best2 |
te(flo,dec_time) |
0.2050133 |
15.000000 |
0.0193067 |
0.0936091 |
| best2 |
te(dec_time,doy) |
0.0000023 |
15.000000 |
0.0000001 |
0.8125709 |
| best2 |
te(dec_time,doy,flo) |
19.8642287 |
48.000000 |
1.0091690 |
0.0000325 |
| best3 |
s(dec_time) |
8.0484777 |
8.729073 |
2.7767507 |
0.0047801 |
| best3 |
s(doy) |
4.3976198 |
8.000000 |
5.3574757 |
0.0000000 |
| best3 |
s(flo) |
1.0000029 |
1.000005 |
5.6587753 |
0.0180052 |
| best3 |
te(flo,doy) |
0.9753113 |
15.000000 |
0.1244896 |
0.0173888 |
| best3 |
te(flo,dec_time) |
3.9431131 |
16.000000 |
1.2111145 |
0.0000004 |
| best3 |
te(dec_time,doy) |
0.0000025 |
15.000000 |
0.0000001 |
0.6013498 |
| best3 |
te(dec_time,doy,flo) |
4.3699401 |
48.000000 |
0.1461156 |
0.0242046 |
| best3 |
s(nh) |
3.6223225 |
4.509054 |
5.9038455 |
0.0000760 |
| best3 |
te(nh,tss) |
9.8228824 |
20.000000 |
1.5171723 |
0.0001388 |
# summary stats of GAMs
map(best, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable
| best1 |
608.6850 |
0.1711443 |
| best2 |
556.8277 |
0.3351218 |
| best3 |
481.8319 |
0.4105542 |
# predictions
sl_res3 <- map(best, function(x){
sl_mod %>%
mutate(
pred = predict(x)
)
}) %>%
enframe('mods') %>%
unnest
# plot
p <- ggplot(sl_res3, aes(x = date)) +
geom_point(data = sl_mod, aes(y = chl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)